import pandas as pd
import numpy as np
import statsmodels.api as sm
from math import sqrt

# Define the file path to the research project's replication package 
filepath = r'C:\Users\mehran.azimi\\'

# output file name
file_name='OOS'

def OOS():
    #==============================================================================
    # OUT OF SAMPLE TEST 
    #============================================================================== 
    data_OS_1=data_OS[[depvar,indepvar,'yearmo','monthnum']]
    data_OS_1=data_OS_1.dropna().reset_index()
    begdate=data_OS_1['yearmo'].iloc[0]
    enddate=data_OS_1['yearmo'].iloc[-1]
    
    firstmonthnum=data_OS_1[data_OS_1['yearmo']==begdate].index.values[0]
    lastmonth=data_OS_1[data_OS_1['yearmo']==enddate].index.values[0]
    nobservations=lastmonth-firstmonthnum+1
    initsampsize= int((lastmonth - firstmonthnum)/2)+1
    nforecasts=nobservations - taw
    for r in [0,1]:  # [0,1]
        ycond=np.array([])
        yuncond=np.array([])
        yact=np.array([])  
        rolling = r        
        for i in range(0,nobservations-initsampsize-taw): 
            x=np.array(data_OS_1[indepvar][firstmonthnum+rolling*i:firstmonthnum+initsampsize+i].astype(float))
            X = sm.add_constant(x)
            y=np.array(data_OS_1[depvar][firstmonthnum+taw+ rolling*i:firstmonthnum+initsampsize+i+taw])
            model = sm.OLS(y, X)
            results = model.fit()
            yhat=results._results.params[0] + results._results.params[1]*data_OS_1[indepvar][firstmonthnum+initsampsize+i]
            ycond= np.append(ycond, yhat)
            yhatuncond= data_OS_1[depvar][firstmonthnum+1+rolling*i:firstmonthnum+initsampsize+i+1].mean() #double check this line for t-step ahead forecast
            yuncond=np.append(yuncond, yhatuncond)
            yact=np.append( yact ,  data_OS_1[depvar][firstmonthnum+initsampsize+i+taw] )  
        csse_cond=np.cumsum ((yact-ycond)**2)
        csse_uncond=np.cumsum ((yact-yuncond)**2)
        csse_diff=csse_uncond - csse_cond
            # calculate Campbell and Thomson 2008 R2os (see Huang et al 2014) (-infinity, +1]
        msfe_frcst=0
        msfe_his=0
        msfe=0
        for i in range(len(yact)):
            msfe_frcst=msfe_frcst+ (yact[i]-ycond[i])**2
            msfe_his=msfe_his+ (yact[i]-yuncond[i])**2
            msfe=msfe+   (yact[i]- np.mean(yact) )**2
        R2os= 1- msfe_frcst/msfe_his        
            # Calculate the MSFE-adjusted statistic of CW 2007
        f_mspe_adj=np.array([])
        f_mspe_n=np.array([])
        for i in range(len(yact)):
            f_mspe_n=np.append(f_mspe_n, (yact[i]-yuncond[i])**2 -  (yact[i]-ycond[i])**2 )
            f_mspe_adj=np.append(f_mspe_adj , (yact[i]-yuncond[i])**2 - (yact[i]-ycond[i])**2  + (yuncond[i] - ycond[i] )**2 )       
        CW_adj = sqrt(len(yact)) * np.mean(f_mspe_adj) / sqrt(np.var(f_mspe_adj)); CW_adj
        if CW_adj>2.33:
            sig1='***'
        elif CW_adj>1.645:
            sig1='**'
        elif CW_adj>1.28:
            sig1='*'
        else:
            sig1=''
        resultsOOS.loc[len(resultsOOS.index)]= [depvar,indepvar, R2os,str(round(CW_adj,2))+ sig1 ,taw,  CW_adj,  rolling]
        

begdate=199002 
enddate=202212   

data_OS=pd.read_csv(filepath + 'AllData.csv')
data_OS=data_OS[ (data_OS['yearmo']>=begdate) & (data_OS['yearmo']<=enddate)  ]
data_OS=data_OS.reset_index()

_vars=['sii','gp', 'alpha', 'ogap_dyn', 'skvw', 'dy', 'dp', 'lty', 'avgcor', 'wtexas', 'bm', 'tbl', 'vrp', 'tail', 'ep', 'svar', 'infl', 'tms', 'eg', 'dfy', 'de', 'ndrbl', 'ntis', 'fbm','lzrt','dtoat','dfr','ltr']
date_filter = data_OS['date'] == '12/31/2022'  
indepvars_2022 = data_OS[  data_OS['date']=='12/31/2022' ][_vars].dropna(axis=1).columns.to_list()
indepvars_2021 = list(set(_vars) - set(indepvars_2022) ) + ['alpha']

# To get results for variables that end in 2021, substitute indepvars_2022 with indepvars_2021 below
resultsOOS=pd.DataFrame(columns=['depvar','indepvar', 'R2os', 'CW-test', 'taw',  'CW_adj', 'rolling'   ])
for dep in ['logMktRF_','Sharpe_']:
    for var in  indepvars_2022: 
        for mt in [1,3,6]:
            # initsampsize=sampsize
            taw=mt
            indepvar=var
            depvar=dep+str(mt)
            OOS()
            print(depvar, var, mt)

resultsOOS.to_excel(filepath + file_name+'.xlsx', index=False)
 

    

